First, I loaded the tidyverse and read in the function created by Benjamin for the county level data

library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.5     ✓ purrr   0.3.4
## ✓ tibble  3.1.4     ✓ dplyr   1.0.7
## ✓ tidyr   1.1.3     ✓ stringr 1.4.0
## ✓ readr   2.0.1     ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(dplyr)
library(usmap)
library(ggplot2)
library(maps)
## 
## Attaching package: 'maps'
## The following object is masked from 'package:purrr':
## 
##     map
library(mapdata)
library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
read_and_clean_county_data <- function(file_path_name, state, cancer_type) {
  read_csv(file_path_name,
           skip = 8) %>%
  janitor::clean_names() %>%
  select(county, age_adjusted_incidence_rate_rate_note_cases_per_100_000) %>%
  mutate(county = (gsub('.{10}$', '', county)),
          state = state,
         cancer_type = cancer_type,
         age_adjusted_incidence_rate =
           as.numeric(age_adjusted_incidence_rate_rate_note_cases_per_100_000)) %>%
  select(state, county, cancer_type, age_adjusted_incidence_rate)
}

Next, I loaded in the county data sets using this function:

#county-level lung cancer PA data https://statecancerprofiles.cancer.gov/

pa_county_lc = read_and_clean_county_data('./data/pa_lung_data_county.csv', 
                                          state = "PA",
                                          cancer_type = "lung") %>%
                                          filter(complete.cases(.)) %>%
                                          slice(-c(1, 2))
## New names:
## * `Lower 95% Confidence Interval` -> `Lower 95% Confidence Interval...5`
## * `Upper 95% Confidence Interval` -> `Upper 95% Confidence Interval...6`
## * `Lower 95% Confidence Interval` -> `Lower 95% Confidence Interval...13`
## * `Upper 95% Confidence Interval` -> `Upper 95% Confidence Interval...14`
## Rows: 87 Columns: 14
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (7): County, FIPS, Met Healthy People Objective of ***?, CI*Rank([rank n...
## dbl (7): Age-Adjusted Incidence Rate([rate note]) - cases per 100,000, Lower...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Warning: One or more parsing issues, see `problems()` for details
#county-level lung cancer OH data https://statecancerprofiles.cancer.gov/

oh_county_lc = read_and_clean_county_data('./data/oh_lung_data_county.csv', 
                                          state = "OH", 
                                          cancer_type = "lung") %>%
                                          filter(complete.cases(.)) %>%
                                          slice(-c(1, 2))
## New names:
## * `Lower 95% Confidence Interval` -> `Lower 95% Confidence Interval...5`
## * `Upper 95% Confidence Interval` -> `Upper 95% Confidence Interval...6`
## * `Lower 95% Confidence Interval` -> `Lower 95% Confidence Interval...13`
## * `Upper 95% Confidence Interval` -> `Upper 95% Confidence Interval...14`
## Rows: 108 Columns: 14
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (7): County, FIPS, Met Healthy People Objective of ***?, CI*Rank([rank n...
## dbl (7): Age-Adjusted Incidence Rate([rate note]) - cases per 100,000, Lower...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Warning: One or more parsing issues, see `problems()` for details
#county-level lung cancer NY data https://statecancerprofiles.cancer.gov/

ny_county_lc = read_and_clean_county_data('./data/ny_lung_data_county.csv', 
                                          state = "NY", 
                                          cancer_type = "lung") %>%
                                          filter(complete.cases(.)) %>%
                                          slice(-c(1, 2))
## New names:
## * `Lower 95% Confidence Interval` -> `Lower 95% Confidence Interval...5`
## * `Upper 95% Confidence Interval` -> `Upper 95% Confidence Interval...6`
## * `Lower 95% Confidence Interval` -> `Lower 95% Confidence Interval...13`
## * `Upper 95% Confidence Interval` -> `Upper 95% Confidence Interval...14`
## Rows: 82 Columns: 14
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (7): County, FIPS, Met Healthy People Objective of ***?, CI*Rank([rank n...
## dbl (7): Age-Adjusted Incidence Rate([rate note]) - cases per 100,000, Lower...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Warning: One or more parsing issues, see `problems()` for details
view(oh_county_lc)
view(pa_county_lc)
view(ny_county_lc)

Finally, I loaded in the state-level lung cancer data

#state-level lung cancer PA data https://www.phaim1.health.pa.gov/EDD/

pa_cancer = read.csv('./data/pa_state_lung.csv', sep = ";", header = T, skip = 3) %>%
  janitor::clean_names() %>%
  select(year, rate_ratio_result) %>%
  map_df(str_replace, pattern = ",", replacement = ".") %>%
    map_df(as.numeric) %>%
    rename(c("age_adjusted_incidence_rate" = "rate_ratio_result")) %>%
  add_column(state = "PA")



#state-level lung cancer OH data https://publicapps.odh.ohio.gov/EDW/DataBrowser/Browse/StateLayoutLockdownCancers

oh_cancer = read.csv('./data/oh_state_lung.csv', sep = ";") %>%
  janitor::clean_names() %>%
  rename(year = cancer_year_year) %>%
  select(year, age_adjusted_rate) %>%
    map_df(str_replace, pattern = ",", replacement = ".") %>%
    map_df(as.numeric) %>%
    rename(c("age_adjusted_incidence_rate" = "age_adjusted_rate")) %>%
  add_column(state = "OH") %>%
  distinct() %>%
  filter(complete.cases(.))
## Warning in .f(.x[[i]], ...): NAs introduced by coercion
#state-level lung cancer NY data https://www.health.ny.gov/statistics/cancer/registry/table2/tb2lungnys.htm

ny_cancer = read.csv("./data/ny_state_lung.csv", skip = 2)[ ,1:3] %>%
  janitor::clean_names() %>%
  rename(year = x) %>%
  select(year, rate_per_100_000_population) %>%
  rename(c("age_adjusted_incidence_rate" = "rate_per_100_000_population")) %>%
  add_column(state = "NY")
# merging county level data for lung cancer

fips_codes = read.csv('./data/fips_codes.csv') %>%
  janitor::clean_names() %>%
  filter(state %in% c("NY", "PA", "OH")) %>%
  rename(c("county" = "name"))


fips_codes$county[fips_codes$county == "St Lawrence"] <- "St. Lawrence"

county_lc <- rbind(pa_county_lc, oh_county_lc, ny_county_lc)

lc_fips <- merge(county_lc, fips_codes, by = c("state", "county"))

# lung cancer incidence maps by county

ny_map <- plot_usmap(data = lc_fips, values = "age_adjusted_incidence_rate", "counties", include = c("NY"), color = "black") +  
    labs(title = "New York Age-Adjusted Lung Cancer Rates, 2014-2018") +  
    scale_fill_continuous(low = "#FFE9C7", high = "#FF0000", 
                          name = "age_adjusted_incidence_rate", label = scales::comma) +  
    theme(plot.background = element_rect(), legend.position = "right")  

oh_map <- plot_usmap(data = lc_fips, values = "age_adjusted_incidence_rate", "counties", include = c("OH"), color = "black") +  
    labs(title = "Ohio Age-Adjusted Lung Cancer Rates, 2014-2018") +  
    scale_fill_continuous(low = "#FFE9C7", high = "#FF0000", 
                          name = "age_adjusted_incidence_rate", label = scales::comma) +  
    theme(plot.background = element_rect(), legend.position = "right")  

pa_map <- plot_usmap(data = lc_fips, values = "age_adjusted_incidence_rate", "counties", include = c("PA"), color = "black") +  
    labs(title = "Pennsylvania Age-Adjusted Lung Cancer Rates, 2014-2018") +  
    scale_fill_continuous(low = "#FFE9C7", high = "#FF0000", 
                          name = "age_adjusted_incidence_rate", label = scales::comma) +  
    theme(plot.background = element_rect(), legend.position = "right")  

ny_oh_pa_lungcancer <- plot_usmap(data = lc_fips, values = "age_adjusted_incidence_rate", "counties", include = c("NY", "OH", "PA"), color = "black") +  
    labs(title = "NY, OH, and PA Age-Adjusted Lung Cancer Rates, 2014-2018") +  
    scale_fill_continuous(low = "#FFE9C7", high = "#FF0000", 
                          name = "age_adjusted_incidence_rate", label = scales::comma) +  
    theme(plot.background = element_rect(), legend.position = "right")  


pa_map

oh_map

ny_map

ny_oh_pa_lungcancer

# merging state level data for lung cancer

state_lc <- rbind(pa_cancer, oh_cancer, ny_cancer)
view(state_lc)

state_lc_wide = state_lc %>%
  pivot_wider(
    names_from = state,
    values_from = age_adjusted_incidence_rate) %>%
  rename(c("PA_AAIR" = "PA", "NY_AAIR" = "NY", "OH_AAIR" = "OH"))

view(state_lc_wide)
fig <- plot_ly(state_lc_wide, x = ~year)
fig <- fig %>% add_lines(y = ~PA_AAIR, name = "Pennsylvania")
fig <- fig %>% add_lines(y = ~NY_AAIR, name = "New York")
fig <- fig %>% add_lines(y = ~OH_AAIR, name = "Ohio")
fig <- fig %>% layout(
    title = "Lung Cancer Age-Adjusted Incidence Rates",
    xaxis = list(
      rangeselector = list(
        buttons = list(
          list(
            count = 1,
            label = "1 yr",
            step = "year",
            stepmode = "backward"),
          list(
            count = 5,
            label = "5 yr",
            step = "year",
            stepmode = "backward"),
          list(
            count = 10,
            label = "10 yr",
            step = "year",
            stepmode = "backward"),
          list(step = "all"))),

      rangeslider = list(type = "year")),

    yaxis = list(title = "Age-Adjusted Incidence Rate (per 100,000"))

fig
fig <- plot_ly(state_lc_wide, x = ~year)
fig <- fig %>% add_lines(y = ~PA_AAIR, name = "Pennsylvania")
fig <- fig %>% add_lines(y = ~NY_AAIR, name = "New York")
fig <- fig %>% add_lines(y = ~OH_AAIR, name = "Ohio")
fig <- fig %>% layout(
    title = "State Age-Adjusted Lung Cancer Incidence Rates",
    xaxis = list(
      rangeselector = list(
        buttons = list(
          list(
            count = 1,
            label = "1 yr",
            step = "year",
            stepmode = "backward"),
          list(
            count = 5,
            label = "5 yr",
            step = "year",
            stepmode = "backward"),
          list(
            count = 10,
            label = "10 yr",
            step = "year",
            stepmode = "backward"),
      rangeslider = list(type = "year"))),

    yaxis = list(title = "Age Adjusted Incidence Rate")))